#!/bin/bash

## clean.fq.gz -> sorted.markdup.bam

#Shell options
working_dir=$1
sample=$2

#Tools
trimmomatic=/picb/lilab/tools/Trimmomatic-0.39/trimmomatic-0.39.jar
adapters=/picb/lilab/tools/Trimmomatic-0.39/adapters/
bwa=/picb/lilab/tools/bwa-0.7.17/bwa
samtools=/picb/tools/samtools-1.9/
gatk=/picb/lilab/tools/gatk-4.2.6.1/gatk

#Reference
reference=/picb/lilab5/share_data/ref/hg38/
bundle=/home/lilabguest7/lilab5_cxf/ref/gatk_bundle/ensembl_hg38

#Directory
if [ ! -d ${working_dir}/cleanfq/ ]
then mkdir -p ${working_dir}/cleanfq
fi 

if [ ! -d ${working_dir}/qc/fastq_qc ]
then mkdir -p ${working_dir}/qc/fastq_qc
fi 

## Filter adapters Trimmomatic
time java -jar ${trimmomatic} PE \
    ${working_dir}/rawfq/${sample}_1.fq.gz ${working_dir}/rawfq/${sample}_2.fq.gz \
    ${working_dir}/cleanfq/${sample}.paired.1.fq.gz ${sample}.unpaired.1.fq.gz \
    ${working_dir}/cleanfq/${sample}.paired.2.fq.gz ${sample}.unpaired.2.fq.gz \
    ILLUMINACLIP:$adapters/TruSeq3-PE-2.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36 

#Mapping bwa-mem
time $bwa mem -t 6 -M -Y -R "@RG\tID:${sample}\tSM:${sample}\tLB:${sample}\tPL:Illumina" \
             $reference/Homo_sapiens.GRCh38.dna.primary_assembly.fa \
             ${working_dir}/cleanfq/{sample}.paired.1.fq.gz ${working_dir}/cleanfq/${sample}.paired.2.fq.gz | samtools view -Sb - > ${working_dir}/bam/${sample}.bam
             

#Sort samtools 
time $samtools sort -@ 4 -m 4G -O bam -o ${working_dir}/bam/sort/${sample}.sorted.bam ${working_dir}/bam/${sample}.bam 

#Mark Duplicates GATK
time $gatk MarkDuplicates \
    -I ${working_dir}/bam/sort/${sample}.sorted.bam \
    -M ${working_dir}/bam/markdup/${sample}.markdup_metrics.txt \
    -O ${working_dir}/bam/markdup/${sample}.sorted.markdup.bam 

#Index samtools
time $samtools index ${working_dir}/bam/markdup/${sample}.sorted.markdup.bam 

#BQSR
time $gatk BaseRecalibrator \
    -R $reference/Homo_sapiens.GRCh38.dna.primary_assembly \
    -I ${working_dir}/bam/markdup/${sample}.sorted.markdup.bam \
    --known-sites ${bundle}/Mills_and_1000G_gold_standard.indels.hg38.ensembl.vcf.gz \
    --known-sites ${bundle}/dbsnp_146.hg38.ensembl.vcf.gz \
    -O ${working_dir}/bam/bqsr/${sample}.sorted.markdup.recal_data.table

time $gatk ApplyBQSR \
    --bqsr-recal-file ${working_dir}/bam/bqsr/${sample}.sorted.markdup.recal_data.table \
    -R $reference/Homo_sapiens.GRCh38.dna.primary_assembly.fa \
    -I ${working_dir}/bam/markdup/${sample}.sorted.markdup.bam \
    -O ${working_dir}/bam/bqsr/${sample}.sorted.markdup.BQSR.bam

#Index samtools
time $samtools index ${working_dir}/bam/${sample}.sorted.markdup.BQSR.bam 



